library(tidyverse)
library(modelsummary)
library(car)
library(survival)
library(clubSandwich)
library(marginaleffects)

load("GR.Rdata")

######### TABLES ######### 
###### TABLE 1 ########
m1 <- lm(dep ~ Conventicle + RMV_PC,
         data = M)
m2 <- lm(dep ~ Tenure + UStA + UGlasgow + UEdinburgh + UAberdeen,
         data = M)
m3 <- lm(dep ~ Burgh + Market_20km + 
           Port_20km + Ruggedness + Court_20km + 
           Population + Calories,
         data = M)
m4 <- lm(dep ~ Conventicle + RMV_PC +  
           Tenure + UStA + UGlasgow + UEdinburgh + UAberdeen  + Burgh + Market_20km + 
           Port_20km + Ruggedness + Court_20km + Calories + Population,
         data = M)

##### defining coefficient mapping to tidy presentation
cmap <- c("Conventicle" = "Conventicle",
          "RMV_PC" = "Militants/1000 residents",
          "Tenure" = "Years tenure",
          "UAberdeen" = "U. Aberdeen",
          "UGlasgow" = "U. Glasgow",
          "UEdinburgh" = "U. Edinburgh",
          "UStA" = "U. St Andrews",
          "Burgh" = "Burgh",
          "Market_20km" = "Market",
          "Port_20km" = "Port",
          "Court_20km" = "Court",
          "Ruggedness" = "Ruggedness",
          "Calories" = "Caloric potential",
          "Population" = "Population (1000s)",
          "(Intercept)" = "Intercept")

##### Model output replicating Table 1. Add 'output = "latex"' to get LaTeX code. 
modelsummary(models = list(m1, m2, m3, m4), stars = c('*' = .05, '**' = .01, '***' = .001),
             ##### cluster-robust SEs:
             vcov = list(vcovCR(m1, cluster = M$Presbytery, type = 'CR1'),
                         vcovCR(m2, cluster = M$Presbytery, type = 'CR1'),
                         vcovCR(m3, cluster = M$Presbytery, type = 'CR1'),
                         vcovCR(m4, cluster = M$Presbytery, type = 'CR1')),
             ##### tidying variable names:
             coef_map = cmap)

###### test for equivalence of Aberdeen and Edinburgh
linearHypothesis(m4, c('UAberdeen = UEdinburgh'),
                      vcov = vcovCR(m4, cluster = M$Presbytery, type = 'CR1'))

###### TABLE 2 #####
Int1 <- lm(dep ~ Conventicle * UAberdeen + RMV_PC +  
             Tenure +
             UStA + UGlasgow + UEdinburgh + UAberdeen + Burgh + Market_20km + 
             Port_20km + Ruggedness + Court_20km + Calories + Population,
           data = M)
Int2 <- lm(dep ~ Conventicle + RMV_PC * UAberdeen + 
             Tenure +
             UStA + UGlasgow + UEdinburgh + UAberdeen + Burgh + Market_20km + 
             Port_20km + Ruggedness + Court_20km + Calories + Population,
           data = M)

##### Model output replicating Table 2. Add 'output = "latex"' to get LaTeX code. 
modelsummary(models = list(Int1, Int2),
             stars = c('*' = .05, '**' = .01, '***' = .001),
             coef_map = c("Conventicle" = "Conventicle",
                          "RMV_PC" = "Militants/1k residents",
                          "UAberdeen" = "U. Aberdeen",
                          "Conventicle:UAberdeen" = "Conventicle × U. Aberdeen",
                          "RMV_PC:UAberdeen" = "Militants × U. Aberdeen"))

###### APPENDIX MODELS #######
### SURVIVAL: TABLE A1 ###
M$S <- 0 # start time t=0
M$E <- ifelse(M$End > 1702, 1702, M$End) # focusing on William's purge, so if survive William, observation is right-censored
M$E <- M$E-1688 # end time equal to years after William's accession
M$surv <- Surv(time=M$S, time2 = M$E, event = M$dep)

c_ph1 <- coxph(surv ~ Conventicle + RMV_PC,
               data = M, cluster = Presbytery)
c_ph2 <- coxph(surv ~ Tenure +
                 UStA + UGlasgow + UEdinburgh + UAberdeen,
               data = M, cluster = Presbytery)
c_ph3 <- coxph(surv ~ Burgh + Market_20km + 
                 Port_20km + Ruggedness + Court_20km + Calories + Population,
               data = M, cluster = Presbytery)
c_ph4 <- coxph(surv ~ Conventicle + RMV_PC + 
                 Tenure + UStA + UGlasgow + UEdinburgh + UAberdeen + Burgh + Market_20km + 
                 Port_20km + Ruggedness + Court_20km + Calories + Population,
               data = M, cluster = Presbytery)

modelsummary(list(c_ph1, c_ph2, c_ph3, c_ph4),
             coef_map = cmap,
             stars = T)

### LOGITS: TABLE A2 ###
l1 <- glm(dep ~ Conventicle + RMV_PC,
                data = M, family = 'binomial'(link = 'logit'))
l2 <- glm(dep ~ Tenure + UStA + UGlasgow + UEdinburgh + UAberdeen,
         data = M, family = 'binomial'(link = 'logit'))
l3 <- glm(dep ~ Burgh + Market_20km + 
           Port_20km + Ruggedness + Court_20km + 
           Population + Calories,
         data = M, family = 'binomial'(link = 'logit'))
l4 <- glm(dep ~ Conventicle + RMV_PC +  
           Tenure + UStA + UGlasgow + UEdinburgh + UAberdeen  + Burgh + Market_20km + 
           Port_20km + Ruggedness + Court_20km + Calories + Population,
         data = M, family = 'binomial'(link = 'logit'))

modelsummary(models = list(l1, l2, l3, l4),
             stars = T,
             vcov = list(vcovCR(l1, cluster = M$Presbytery, type = 'CR1'),
                         vcovCR(l2, cluster = M$Presbytery, type = 'CR1'),
                         vcovCR(l3, cluster = M$Presbytery, type = 'CR1'),
                         vcovCR(l4, cluster = M$Presbytery, type = 'CR1')),
             coef_map = cmap)

### CONTINUOUS: TABLE A3 ###
c3 <- lm(dep ~ Burgh + Distance_to_Market + 
                Distance_to_Port + Ruggedness + Distance_to_Court + 
                Population + Calories,
              data = M)
cl3 <- lm(dep ~ Burgh + I(log(Distance_to_Market+1)) + 
                    I(log(Distance_to_Port+1)) + Ruggedness + I(log(Distance_to_Court+1)) + 
                    Population + Calories,
                  data = M)
c4 <- lm(dep ~ Conventicle + RMV_PC +  
                Tenure +
                UStA + UGlasgow + UEdinburgh + UAberdeen + Burgh + Distance_to_Market + 
                Distance_to_Port + Ruggedness + Distance_to_Court + Population + Calories,
              data = M)
cl4 <- lm(dep ~ Conventicle + RMV_PC +  
                    Tenure +
                    UStA + UGlasgow + UEdinburgh + UAberdeen + Burgh + I(log(Distance_to_Market+1)) + 
                    I(log(Distance_to_Port+1)) + Ruggedness + I(log(Distance_to_Court+1)) + Population + Calories,
                  data = M)
modelsummary(models = list(c3, cl3, c4, cl4),
             stars = T,
             vcov = list(vcovCR(c3, cluster = M$Presbytery, type = 'CR1'),
                         vcovCR(cl3, cluster = M$Presbytery, type = 'CR1'),
                         vcovCR(c4, cluster = M$Presbytery, type = 'CR1'),
                         vcovCR(cl4, cluster = M$Presbytery, type = 'CR1')),
             coef_map = c("Conventicle" = "Conventicle",
                          "RMV_PC" = "Militants/1000 residents",
                          "tenure" = "Years tenure",
                          "UAberdeen" = "U. Aberdeen",
                          "UGlasgow" = "U. Glasgow",
                          "UEdinburgh" = "U. Edinburgh",
                          "UStA" = "U. St Andrews",
                          "burgh" = "Burgh",
                          "Distance_to_Market" = "Market",
                          "I(log(Distance_to_Market + 1))" = "Market",
                          "Distance_to_Port" = "Port",
                          "I(log(Distance_to_Port + 1))" = "Port",
                          "Ruggedness" = "Ruggedness",
                          "Distance_to_Court" = "Court",
                          "I(log(Distance_to_Court + 1))" = "Court",
                          "Calories" = "Caloric potential",
                          "Population" = "Urban population (1000s)",
                          "(Intercept)" = "Intercept"))

### LOWLAND ONLY: TABLE A4 ###
lowland <- c("Aberdeen", "Fife", "Perth and Sterling", "Angus and Mearns", "Edinburgh",
             "Glasgow and Ayr", "Merse and Teviotdale", "Dumfries", "Galloway", "Moray")

low1 <- lm(dep ~ Conventicle + RMV_PC,
             data = subset(M, Synod %in% lowland))
low2 <- lm(dep ~ Tenure +
               UStA + UGlasgow + UEdinburgh + UAberdeen,
             data = subset(M, Synod %in% lowland))
low3 <- lm(dep ~ Burgh + Market_20km + 
               Port_20km + Ruggedness + Court_20km + 
               Population + Calories,
             data = subset(M, Synod %in% lowland))
low4 <- lm(dep ~ Conventicle + RMV_PC +  
               Tenure + UStA + UGlasgow + UEdinburgh + UAberdeen  + Burgh + Market_20km + 
             Port_20km + Ruggedness + Court_20km + 
             Population + Calories,
             data = subset(M, Synod %in% lowland))
low4a <- lm(dep ~ Conventicle + 
                Tenure + UStA + UGlasgow + UEdinburgh + UAberdeen  + Burgh + Market_20km + 
              Port_20km + Ruggedness + Court_20km + 
              Population + Calories,
              data = subset(M, Synod %in% lowland))
low4b <- lm(dep ~ RMV_PC + 
                Tenure +
                UStA + UGlasgow + UEdinburgh + UAberdeen  + Burgh + Market_20km + 
              Port_20km + Ruggedness + Court_20km + 
              Population + Calories,
              data = subset(M, Synod %in% lowland))

modelsummary(list(low1, low2, low3, low4, low4a, low4b), stars = T, 
             coef_map =cmap,
             vcov = list(
               vcovCR(low1, cluster = subset(M, Synod %in% lowland)$Presbytery, type = 'CR1'),
               vcovCR(low2, cluster = subset(M, Synod %in% lowland)$Presbytery, type = 'CR1'),
               vcovCR(low3, cluster = subset(M, Synod %in% lowland)$Presbytery, type = 'CR1'),
               vcovCR(low4, cluster = subset(M, Synod %in% lowland)$Presbytery, type = 'CR1'),
               vcovCR(low4a, cluster = subset(M, Synod %in% lowland)$Presbytery, type = 'CR1'),
               vcovCR(low4b, cluster = subset(M, Synod %in% lowland)$Presbytery, type = 'CR1')))



######### FIGURES ######### 
##### FIGURE 4 #####
###### Depicting substantive effects of full model
preds <- predictions(model = m4, newdata = 'mean', 
                     variables = list("Tenure" = seq(0:20), "Calories" = c(4.07, 8.49), "Conventicle" = c(0,1)))
subeff_plot <- ggplot(preds, aes(x = Tenure, color = as.factor(Calories), group = as.factor(Calories))) +
  geom_line(aes(y = estimate), linewidth = 1.33) +
  geom_line(aes(y = conf.low), lty = 2) +
  geom_line(aes(y=conf.high), lty = 2) +
  facet_wrap(~Conventicle, labeller = as_labeller(c(`0` = "No Conventicle", `1`= "Conventicle"))) +
  theme_minimal()+
  theme(text = element_text(family = 'Open Sans'),
        legend.title.align = .5,
        legend.text.align = .5,
        panel.grid.minor = element_blank()) +
  labs(x = "Years of tenure",
       y = "Predicted probability of purge",
       color = "Caloric potential",
       linetype = "Caloric potential") +
  scale_color_viridis_d(end = .8, option = 'magma',
                        labels = c("20th percentile", "80th percentile")) +
  scale_y_continuous(labels = scales::percent) 

##### FIGURE 5 ##### 
p1 <- slopes(Int1, newdata = "mean", variables = c("UAberdeen"), by = "Conventicle") %>%
  ggplot(aes(x = as.factor(Conventicle), y = estimate)) +
  geom_point() +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
  theme_minimal() +
  theme(text=element_text(family = "Open Sans"),
        axis.title.x = element_blank(),
        axis.text.x = element_text(size = 11)) +
  scale_x_discrete(labels = c("No conventicle", "Conventicle")) +
  lims(y = c(-.3, .4)) +
  labs(y = "Marginal effect of U. Aberdeen attendance")

p2<- slopes(Int2, newdata = "mean", variables = c("UAberdeen"), by = "RMV_PC") %>%
  ggplot(aes(x = RMV_PC)) +
  geom_line(aes(y = estimate)) +
  geom_line(aes(y = conf.low), lty = 2) +
  geom_line(aes(y = conf.high), lty = 2) +
  theme_minimal() + 
  theme(text=element_text(family = "Open Sans"),
        axis.title.y = element_blank()) +
  lims(x = c(0, 15), y = c(-.3, .4)) +
  labs(x = "Militants per 1k residents")
